# 加载class包
if (!require(class)) install.packages("class")
library(class)
library(caret)
library(pROC)
library(MatchIt)

# 设置工作目录并加载数据
setwd("${path}")
mydata <- read.csv("data.csv")

# 确保 ${hotspot} 是因子，并且值为 "event_0" 和 "event_1"
mydata$${hotspot} <- factor(mydata$${hotspot}, levels = c("0", "1"), labels = c("event_0", "event_1"))


# 使用 matchit 进行 1:3 匹配
m.out <- matchit(${hotspot} ~ ${independent_and}, 
                 data = mydata, 
                 method = "nearest", 
                 ratio = 3)

# 提取匹配后的数据
matched_data <- match.data(m.out)
# 检查并处理缺失值
matched_data <- na.omit(matched_data)  # 删除包含缺失值的行
# 将 ${hotspot} 转换为数值型 (0 和 1)
matched_data$${hotspot}_num <- as.numeric(matched_data$${hotspot} == "event_1")
# 分离训练集和测试集（KNN需要独立的训练集和测试集）
set.seed(123)
#inTrain <- createDataPartition(matched_data$${hotspot}, p = 0.8, list = FALSE)
#train_set <- matched_data[inTrain, ]
#test_set <- matched_data[-inTrain, ]

# 预测和评估
predictions_knn <- knn(train = matched_data[, c(${independent_dou})], 
                       test = matched_data[, c(${independent_dou})], 
                       cl = matched_data$${hotspot}, 
                       k = 5,
                       prob = TRUE)# 使用prob=TRUE以获取概率
# 提取概率
attr_prob <- attributes(predictions_knn)$prob
predicted_probs_knn <- ifelse(predictions_knn == "event_1", attr_prob, 1 - attr_prob)
predicted_classes_knn <- predictions_knn

# 添加预测结果到数据集
mydata_with_predictions_knn <- matched_data %>%
  mutate(
    ${hotspot}_probability = predicted_probs_knn,
    ${hotspot}_predicted_class = predicted_classes_knn,
    ${hotspot}_score = NA  # KNN没有评分卡
  )
# 计算 ${hotspot} 和 ${hotspot}_predicted_class 相同的数据数量
matching_rows <- mydata_with_predictions_knn %>%
  filter(${hotspot} == ${hotspot}_predicted_class) %>%
  nrow()
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", matching_rows, "\n")
# 可选：计算准确率（匹配行数 / 总行数）
total_rows <- nrow(matched_data)
accuracy <- matching_rows / total_rows
# 打印结果
cat("Number of rows where ${hotspot} and ${hotspot}_predicted_class match:", accuracy, "\n")
# 这是建模集的结果
write.csv(mydata_with_predictions_knn, file = "mydata_with_predictions_knn.csv", row.names = FALSE)

# 加载所需的库
if (!require(caret)) install.packages("caret")
if (!require(pROC)) install.packages("pROC")
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(yardstick)) install.packages("yardstick")
if (!require(ModelMetrics)) install.packages("ModelMetrics")
if (!require(caretEnsemble)) install.packages("caretEnsemble")
if (!require(plotly)) install.packages("plotly")

library(class)
library(caret)
library(pROC)
library(ggplot2)
library(yardstick)
library(ModelMetrics)
library(caretEnsemble)
library(plotly)
#### 混淆矩阵 ####
# conf_matrix <- confusionMatrix(as.factor(predicted_classes_knn), as.factor(mydata_with_predictions_knn$${hotspot}))
# sink("confusionMatrix_knn.txt")
# print(conf_matrix)
# sink()

#### ROC曲线 ####
# 创建ROC曲线
# 计算AUC并格式化输出
# 创建ROC曲线并计算AUC
roc_curve <- roc(response = mydata_with_predictions_knn$${hotspot}_num, 
                 predictor = predicted_probs_knn, 
                 levels = c(0, 1))
auc_value <- auc(roc_curve)
auc_label <- paste0("AUC = ", formatC(auc_value, digits = 3, format = "f"))
Cairo::CairoTIFF(file="roc_knn.tiff", width=800, height=800,units="in",dpi=150)
# 绘制带有AUC值和对角线的ROC曲线
ggroc(roc_curve, legacy.axes = TRUE) +
  # 添加对角线
  annotate("segment", x = 0, xend = 1, y = 0, yend = 1, linetype = "dashed", color = "grey60") +
  
  # 添加AUC标签
  annotate("text", x = 0.7, y = 0.3, label = auc_label, size = 5, fontface = "bold") +
  
  # 设置图形标题和坐标轴标签
  ggtitle("Receiver Operating Characteristic (ROC) Curve") +
  labs(x = "False Positive Rate", y = "True Positive Rate") +
  
  # 设置主题样式
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.major = element_line(color = "grey90"),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA)
  ) +
  
  # 设置线条和填充颜色
  scale_color_manual(values = "#1f78b4") +
  scale_fill_manual(values = "#1f78b4") +
  
  # 确保图形比例为1:1，使ROC曲线更加标准
  coord_fixed(ratio = 1)
dev.off()
#### 校准图 ####
# 创建数据框
calibration_data <- data.frame(
  PredictedProb = predicted_probs_knn,
  TrueClass = matched_data$${hotspot}_num
)

# 将数据按预测概率分箱（例如分成10个等宽区间）
num_bins <- 10
calibration_data$bin <- cut(calibration_data$PredictedProb, breaks = num_bins, include.lowest = TRUE)

# 计算每个区间的平均预测概率和实际发生率
calibration_summary <- calibration_data %>%
  group_by(bin) %>%
  summarise(
    MeanPredictedProb = mean(PredictedProb, na.rm = TRUE),
    ActualRate = mean(TrueClass, na.rm = TRUE),
    .groups = 'drop'
  )
Cairo::CairoTIFF(file="cal_knn.tiff", width=800, height=800,units="in",dpi=150)
# 绘制校准图
ggplot(calibration_summary, aes(x = MeanPredictedProb, y = ActualRate)) +
  geom_point(size = 3, color = "#1f78b4") +  # 数据点
  geom_line(color = "#1f78b4", size = 1) +   # 连接线
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey60") +  # 对角线
  labs(title = "Calibration Plot",
       x = "Mean Predicted Probability",
       y = "Actual Rate") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.grid.major = element_line(color = "grey90"),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA)
  )
dev.off()
#### K值选择图 ####
# 确保加载必要的库
if (!require(caret)) install.packages("caret")
library(caret)

# 分离训练集和测试集（KNN需要独立的训练集和测试集）
set.seed(123)
inTrain <- createDataPartition(matched_data$${hotspot}_num, p = 0.8, list = FALSE)
train_set <- matched_data[inTrain, ]
test_set <- matched_data[-inTrain, ]

# 定义K值范围
k_values <- seq(1, 20, by = 2)

# 计算不同K值下的准确率
accuracies <- sapply(k_values, function(k) {
  predictions_cv <- knn(
    train = train_set[, c("auth_sum", "lan_sum", "web_site_sum", "jour_sum", "pt_sum", "country_sum", "fund_sum", "unit_sum", "sum", "total", "impact_factor", "fen_qu")],
    test = test_set[, c("auth_sum", "lan_sum", "web_site_sum", "jour_sum", "pt_sum", "country_sum", "fund_sum", "unit_sum", "sum", "total", "impact_factor", "fen_qu")],
    cl = as.factor(train_set$${hotspot}),  # 确保cl是因子类型
    k = k,
    prob = FALSE  # 不需要概率预测，只关心分类结果
  )
  
  mean(predictions_cv == as.character(test_set$${hotspot}), na.rm = TRUE)  # 计算准确率
})

# 创建K值选择图的数据框
k_plot_df <- data.frame(K = k_values, Accuracy = accuracies)

# 创建一个更加正式的K值选择图
knn_plot <- ggplot(k_plot_df, aes(x = K, y = Accuracy)) +
  geom_line(color = "#2c7bb6", size = 0.8) +  # 使用专业蓝作为连接线的颜色
  geom_point(color = "#d7191c", size = 3, shape = 21, fill = "white") +  # 使用红色圆圈作为数据点，内部填充白色
  labs(title = "Optimal K Selection for KNN Model",
       subtitle = "Accuracy of the model across different values of K",
       x = "K Value",
       y = "Accuracy") +
  theme_bw() +  # 使用主题黑白风格
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5),
    axis.title = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey85", size = 0.2),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(fill = "white", color = NA),
    panel.border = element_rect(colour = "black", fill=NA, size=0.5)
  )
Cairo::CairoTIFF(file="knn_knn.tiff", width=800, height=800,units="in",dpi=150)
# 显示图表
print(knn_plot)
dev.off()
mydata1 <- read.csv("result.csv")
# 预测和评估
predictions_knn <- knn(train = matched_data[, c(${independent_dou})], 
                       test = mydata1[, c(${independent_dou})], 
                       cl = matched_data$${hotspot}, 
                       k = 5)

predicted_probs_knn <- sapply(predictions_knn, function(x) ifelse(x == "event_1", 0.9, 0.1))  # 简化处理
predicted_classes_knn <- predictions_knn

# 添加预测结果到数据集
result_with_predictions_knn <- mydata1 %>%
  mutate(
    ${hotspot}_probability = NA,
    ${hotspot}_predicted_class = predicted_classes_knn,
    ${hotspot}_score = NA  # KNN没有评分卡
  )
# 这是结局的结果
write.csv(result_with_predictions_knn, file = "result_with_predictions_knn.csv", row.names = FALSE)
